#Replication codes for Dai & Kustov. 2022. 
#"When Do Politicians Use Populist Rhetoric? Populism as a Campaign Gamble"
#Political Communication.

#################
# LOAD PACKAGES # 
#################

rm(list=ls(all=TRUE))
library(stargazer)
library(dplyr)
library(gplots)
library(ggplot2)
library(sandwich)
library(lmtest)
library(multiwayvcov)
library(sjPlot)
library(parameters)
library(lfe)
library(MASS)
library(pscl)
library(lmerTest)
library(MuMIn)
library(specr)

#################
# LOAD DATASETS # 
#################

dataCYM1952 <- read.csv("dataCYM1952.csv")
dataSpeechCYM1952 <- read.csv("dataSpeechCYM1952.csv")
dataSubSpeechCYM1952 <- read.csv("dataSubSpeechCYM1952.csv")

### List of the datasets and key variables

# dataSpeechCYM1952     Speech-level Populism by Candidate-Year-Month Polling [MAIN]
# dataCYM1952           Candidate-Year-Month Populism by Candidate-Year-Month Polling
# dataSubSpeechCYM1952  Subspeech-level Populism by Candidate-Year-Month Polling for ggplot visualizations

#IV (polling) varies only by Candidate-Year or Candidate-Year-Month [Margin/Win]
#DV (populism) varies by Subspeech [Pop_class: 1 predicted as being populist 0 otherwise] 
#########################and Speech [Pop_prop: proportion of populist subspeeches]


##########################
# DESCRIPTIVE STATISTICS # 
##########################

##Table B1: Summary statistics

dataSpeechCYM1952$Republican <- ifelse(dataSpeechCYM1952$Party == "R", 1, 0)

stargazer(dataSpeechCYM1952[c("Pop_prop", "Win", "Margin", 
                              "Incumbent", "Republican", "Speech_len_01")],
          covariate.labels=c("Populism Rhetoric (Average Share)", 
                             "Electoral Advantage (Binary)", 
                             "Electoral Advantage (Percent)", "Party Incubency", 
                             "Party Membership (Republican)",
                             "Speech Length (Standardized)"), 
          column.sep.width = "-5pt",
          notes = "Full Speech-level Data (n = 3,435). The mean speech length is 2,167 words.", 
          notes.align = "l",
          nobs = FALSE, digits=2, label = "tab:summary",
          title="Summary Statistics", iqr = TRUE) 

##Figure 2: Average Share of Populist Rhetoric across Campaigns 

ggplot(dataSubSpeechCYM1952, aes(y = Year_Candidate, x = Pop_class, fill = Party)) + 
  geom_bar(position = "dodge", stat = "summary", fun.y = "mean") + 
  theme_minimal(base_size = 14) +
  theme(axis.text.x = element_text(angle = 0)) + 
  labs(title="", y ="", x = "Populist rhetoric, average share\n") + 
  scale_fill_manual(name = "", labels = c("Democratic", "Republican"), 
                    values = c("black", "grey")) +
  theme(legend.position = "top")

##Figure B1: Number of speeches by campaign 

ggplot(dataSpeechCYM1952, aes(y = reorder(Candidate_Year, -Year) , fill = as.factor(Republican))) + 
  geom_bar(stat = "count") +
  theme_minimal(base_size = 14) +
  theme(axis.text.x = element_text(angle = 0)) + 
  labs(title="", y ="", x = "Number of speeches\n") + 
  scale_fill_manual(name = "", labels = c("Democratic", "Republican"), values = c("black", "grey")) +
  theme(legend.position = "top")

############
# ANALYSIS #
############

##Figure 3: Electoral Advantage and Populist Rhetoric in U.S. Presidential Speeches 

par(mar=c(5,5,0.5,0.5))
plotmeans(Pop_prop ~ Win, data = dataSpeechCYM1952, p = 0.95, ylim = c(0, 0.04), 
          barcol = "black", barwidth = 1, connect=F,
          xlab="Electoral advantage", ylab="Populist rhetoric, average share", 
          main="", n.label = F, cex.lab=1.5, cex.axis=1)
plotmeans(Pop_prop ~ Win, data=dataSpeechCYM1952, p = 0.84, barcol = "black", 
          barwidth = 2, connect=F,
          xlab="", ylab="", main="", n.label = F, xaxt = "n", add = TRUE)

##Table 1: Populist Rhetoric as a Function of Electoral Advantage 

Table1_1 <- lm(Pop_prop ~ Win + Incumbent + Party, data = dataSpeechCYM1952)
Table1_2 <- lm(Pop_prop ~ Win + Incumbent + Party + Speech_len_01 + factor(Month), 
          data = dataSpeechCYM1952)
Table1_3 <- lm(Pop_prop ~ Win + Incumbent + Party + Speech_len_01 + factor(Month) + 
            factor(Year), data = dataSpeechCYM1952)

rob.se.Table1_1 <- sqrt(diag(vcovHC(Table1_1, type = "HC2")))
rob.se.Table1_2 <- sqrt(diag(vcovHC(Table1_2, type = "HC2")))
rob.se.Table1_3 <- sqrt(diag(vcovHC(Table1_3, type = "HC2")))


stargazer(Table1_1, Table1_2, Table1_3, 
          se=list(rob.se.Table1_1, rob.se.Table1_2, rob.se.Table1_3),
          title="Populist Rhetoric as a Function of Electoral Advantage (Speech Level)", 
          dep.var.caption = "", dep.var.labels="",
          covariate.labels=c("Electoral Advantage", "Party Incumbency", "Partisanship (GOP)"),
          align=TRUE, no.space=TRUE, column.sep.width = "-15pt",
          label="tab:dataSpeechCYM",
          omit = c("Year", "Month", "Candidate", "Speech_len_01", "Constant"),
          omit.stat=c("rsq", "ser", "f"), notes = "", 
          notes.append = F,  notes.label = "",
          star.char = c("*", "**", "***"), star.cutoffs = c(0.05, 0.01, 0.001),
          add.lines = list(c("Speech Length", "No", "Yes", "Yes", "Yes", "Yes"),
                           c("Month FE", "No", "Yes", "Yes", "Yes", "Yes"),
                           c("Year FE", "No", "No", "Yes", "Yes", "Yes")))

# ROBUSTNESS CHECKS

## Table 2: Mixed models 

RE_model <- lmer(Pop_prop ~ (1 | Month) + (1 | Year) + (1 | Candidate), 
             data = dataSpeechCYM1952)
ME_model <- lmer(Pop_prop ~ Win + Incumbent + Party + Speech_len_01 + (1 | Month) + 
               (1 | Year) + (1 | Candidate), data = dataSpeechCYM1952)


class(RE_model) <- "lmerMod"
class(ME_model) <- "lmerMod"

stargazer(RE_model, ME_model, 
          title="Populist Rhetoric as a Function of Electoral Advantage (Mixed Models)", 
          dep.var.caption = "", dep.var.labels="",
          column.labels = c("Random Effects", "Mixed Effects"),
          covariate.labels=c("Electoral Advantage", "Party Incumbency", "Partisanship (GOP)"),
          align=TRUE, no.space=TRUE, column.sep.width = "-15pt",
          label="tab:dataSpeechCYM_ME",
          omit = c("Constant", "Speech_len_01"), omit.stat=c("aic", "bic"), 
          notes = "", notes.append = F,  notes.label = "",
          star.char = c("*", "**", "***"), star.cutoffs = c(0.05, 0.01, 0.001),
          add.lines = list(c("Pseudo-R^2 (marginal)", "0", "0.033"),
                           c("Pseudo-R^2 (conditional)", "0.494", "0.496"),
                           c("Months SD/ICC (N = 11)", "0.019/3.3\\%", "0.018/3.3\\%"),
                           c("Years SD/ICC (N = 17)", "0.031/9.4\\%", "0.031/9.2\\%"),
                           c("Candidates SD/ICC (N = 24)", "0.062/36.7\\%", "0.060/35.3\\%"),
                           c("Residual SD/ICC", "0.073/50.6\\%", "0.073/52.1\\%")))


## Table B2: Populist Rhetoric as a Function of Electoral Advantage (Robustness Checks)

B2_model1 <- lm(Pop_class ~ Win + Incumbent + Party + factor(Month) + factor(Year), 
          data = dataSubSpeechCYM1952)
B2_model2 <- lm(Pop_prop ~ Win + Incumbent + Party + Speech_len_01 + factor(Month) + 
            factor(Year) + factor(Candidate), data = dataSpeechCYM1952)

dataSpeechCYM1952T <- dataSpeechCYM1952[dataSpeechCYM1952$Candidate != "Donald J. Trump",]
B2_model3 <- lm(Pop_prop ~ Win + Incumbent + Party + Speech_len_01 + factor(Month) + 
            factor(Year) + factor(Candidate), data = dataSpeechCYM1952T)

stargazer(B2_model1, B2_model2, B2_model3,
          title="Populist Rhetoric as a Function of Electoral Advantage (Robustness Checks)", 
          dep.var.caption = "", dep.var.labels=c("Sub-speech Level", "Speech Level"),
          covariate.labels=c("Electoral Advantage", "Party Incumbency", "Partisanship (GOP)"),
          align=TRUE, no.space=TRUE, column.sep.width = "-15pt",
          label="tab:dataSpeechCYM_RC",
          omit = c("Year", "Month", "Candidate", "Constant", "Speech_len_01"),
          omit.stat=c("rsq", "ser", "f"), 
          notes = "", notes.append = F,  notes.label = "",
          star.char = c("*", "**", "***"), star.cutoffs = c(0.05, 0.01, 0.001),
          add.lines = list(c("Speech Length", "N/A", "Yes", "Yes"),
                           c("Month FE", "Yes", "Yes", "Yes", "Yes"),
                           c("Year FE", "Yes", "Yes", "Yes", "Yes"),
                           c("Candidate FE", "No", "Yes", "Yes"),
                           c("Excluding D. Trump", "No", "No", "Yes")))

##Table B3: Populist Rhetoric as a Function of Electoral Advantage Sep.-Nov. Only 

dataSpeechCYM31952 <- dataSpeechCYM1952[dataSpeechCYM1952$Month == "September" |
                                          dataSpeechCYM1952$Month == "October" |  
                                          dataSpeechCYM1952$Month == "November",]

B3_model1 <- lm(Pop_prop ~ Win + Incumbent + Party, data = dataSpeechCYM31952)
B3_model2 <- lm(Pop_prop ~ Win + Incumbent + Party + Speech_len_01 + factor(Month), 
          data = dataSpeechCYM31952)
B3_model3 <- lm(Pop_prop ~ Win + Incumbent + Party + Speech_len_01 + factor(Month) + 
            factor(Year), data = dataSpeechCYM31952)
B3_model4 <- lm(Pop_prop ~ Win + Incumbent + Party + Speech_len_01 + factor(Month) + 
            factor(Year) + factor(Candidate), data = dataSpeechCYM31952)
dataSpeechCYM31952T <- dataSpeechCYM31952[dataSpeechCYM31952$Candidate != "Donald J. Trump",]
B3_model5 <- lm(Pop_prop ~ Win + Incumbent + Party + Speech_len_01 + factor(Month) + 
            factor(Year) + factor(Candidate), data = dataSpeechCYM31952T)

stargazer(B3_model1, B3_model2, B3_model3, B3_model4, B3_model5, 
          title="Populist Rhetoric as a Function of Electoral Advantage (Sep.-Nov. Only)", 
          dep.var.caption = "", dep.var.labels="",
          covariate.labels=c("Electoral Advantage", "Party Incumbency", "Partisanship (GOP)"),
          align=TRUE, no.space=TRUE, column.sep.width = "-15pt",
          label="tab:dataSpeechCYM3",
          omit = c("Year", "Month", "Candidate", "Constant", "Speech_len_01"),
          omit.stat=c("rsq", "ser", "f"), notes = "", 
          notes.append = F,  notes.label = "",
          star.char = c("*", "**", "***"), star.cutoffs = c(0.05, 0.01, 0.001),
          add.lines = list(c("Speech Length", "No", "Yes", "Yes", "Yes", "Yes"),
                           c("Month FE", "No", "Yes", "Yes", "Yes", "Yes"),
                           c("Year FE", "No", "No", "Yes", "Yes", "Yes"),
                           c("Candidate FE", "No", "No", "No", "Yes", "Yes"),
                           c("Excluding D. Trump", "No", "No", "No", "No", "Yes")))

##Table B4: Populist Rhetoric as a Function of Electoral Advantage (Candidate-Year-Month Level)

B4_model1 <- lm(Pop_prop ~ Win, data = dataCYM1952)
B4_model2 <- lm(Pop_prop ~ Win + factor(Year) + factor(Month), data = dataCYM1952)

stargazer(B4_model1, B4_model2,
          title="Populist Rhetoric as a Function of Electoral Advantage (Candidate-Year-Month)", dep.var.caption = "", dep.var.labels="",
          covariate.labels=c("Electoral Advantage"),
          label="tab:dataCYM",
          align=TRUE, no.space=TRUE, column.sep.width = "-5pt", 
          omit = c("Year", "Month"),
          omit.stat=c("rsq", "ser", "f", "adj.rsq"), notes = "", notes.append = F,  notes.label = "",
          star.char = c("+", "*", "**", "***"), star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          add.lines = list(c("Month FE", "No", "Yes"), c("Year FE", "No", "Yes")))

##Table B5: Populist Rhetoric as a Function of Electoral Advantage (Rare Event Models)

dataSubSpeechCYM1952_count <- dataSubSpeechCYM1952 %>%
  group_by(Speech_index) %>%
  summarise(Pop_count = sum(Pop_class))

#Merge with the full datasets
dataSubSpeechCYM1952 <- merge(dataSubSpeechCYM1952,dataSubSpeechCYM1952_count, by="Speech_index")
dataSpeechCYM1952 <- merge(dataSpeechCYM1952,dataSubSpeechCYM1952_count, by="Speech_index")

#Run a regular negative binomial

B5_model1 <- glm.nb(Pop_count ~ Win + Incumbent + Party, data = dataSpeechCYM1952)
summary(B5_model1)
B5_model2 <- glm.nb(Pop_count ~ Win + Incumbent + Party + SubSpeechCounts + factor(Month), 
               data = dataSpeechCYM1952)
summary(B5_model2)
B5_model3 <- glm.nb(Pop_count ~ Win + Incumbent + Party + SubSpeechCounts + factor(Month) + 
                 factor(Year), data = dataSpeechCYM1952)
summary(B5_model3)
B5_model4 <- glm.nb(Pop_count ~ Win + Incumbent + Party + SubSpeechCounts + factor(Month) + 
                 factor(Year) + factor(Candidate), data = dataSpeechCYM1952)
summary(B5_model4)

# Run a Zero-Inflated negative binomial
B5_model5 <- zeroinfl(Pop_count ~ Win | 
                   Win, data = dataSpeechCYM1952, dist = "negbin")
summary(B5_model5)
B5_model6 <- zeroinfl(Pop_count ~ Win + Incumbent + Party + SubSpeechCounts + 
                   factor(Month) + factor(Year) | 
                   Win + Incumbent + Party + factor(Month) + factor(Year), 
                 data = dataSpeechCYM1952, dist = "negbin")
summary(B5_model6)


stargazer(B5_model1, B5_model2, B5_model3, B5_model4, 
          B5_model5, B5_model5, B5_model6, B5_model6, #zero-inflated coefficients are entered manually from model summary
          title="Populist Rhetoric as a Function of Electoral Advantage", 
          dep.var.caption = "", dep.var.labels="",
          covariate.labels=c("Electoral Advantage", "Party Incumbency", "Partisanship (GOP)"),
          align=TRUE, no.space=TRUE, column.sep.width = "-25pt",
          label="tab:dataSpeechCYM_NL",
          omit = c("Year", "Month", "Candidate", "Constant", "SubSpeechCounts", "Incumbent", "Party"),
          omit.stat=c("aic", "theta"), notes = "", notes.append = F,  notes.label = "",
          star.char = c("*", "**", "***"), star.cutoffs = c(0.05, 0.01, 0.001),
          add.lines = list(c("Candidate Controls", "Yes", "Yes", "Yes", "Yes", "No", "No", "Yes", "Yes"),
                           c("Sub-Speech Count", "No", "Yes", "Yes", "Yes", "No", "No", "Yes", "No"),
                           c("Month FE", "No", "Yes", "Yes", "Yes", "No", "No", "Yes", "Yes"),
                           c("Year FE", "No", "No", "Yes", "Yes", "No", "No", "Yes", "Yes"),
                           c("Candidate FE", "No", "No", "No", "Yes", "No", "No", "No", "No")))

